#!/usr/bin/env python
# encoding: utf-8
"""
@author: youfeng
@email: youfeng243@163.com
@license: Apache Licence
@file: repair_company_list.py
@time: 2018/1/2 11:02
"""

import sys

sys.path.append('..')

from common.mongo import MongDb
from logger import Logger

MONGO_DB_SOURCE = {
    'host': '172.16.215.2',
    'port': 40042,
    'db': 'company_data',
    'username': 'work',
    'password': 'haizhi'
}

# 日志模块
log = Logger('repair_company_list.log').get_logger()

source_db = MongDb(MONGO_DB_SOURCE['host'], MONGO_DB_SOURCE['port'], MONGO_DB_SOURCE['db'],
                   MONGO_DB_SOURCE['username'],
                   MONGO_DB_SOURCE['password'], log=log)

# 省份信息
PROVINCE_LIST = [
    'gansu',
    'gsxt',
    'hunan',
    'beijing',
    'tianjin',
    'hebei',
    'shandong',
    'guangxi',
    'qinghai',
    'jiangsu',
    'ningxia',
    'sichuan',
    'liaoning',
    'guangdong',
    'jilin',
    'shanxi',
    'anhui',
    'shanghai',
    'xizang',
    'hainan',
    'shanxicu',
    'neimenggu',
    'yunnan',
    'hubei',
    'zhejiang',
    'heilongjiang',
    'chongqing',
    'xinjiang',
    'henan',
    'guizhou',
    'fujian',
    'jiangxi',
]


def main():
    total_count = 0
    for province in PROVINCE_LIST:
        table_name = 'new_{}_search_list'.format(province)

        result_list = []

        current_count = 0
        for item in source_db.traverse_batch(table_name):
            current_count += 1
            total_count += 1
            status = item.get('status')
            register_code = item.get('register_code')
            register_date = item.get('register_date')
            history_name = item.get('history_name')

            if isinstance(status, basestring):
                status = status.strip().replace(' ', '')
                item['status'] = status

            if isinstance(register_code, basestring):
                register_code = register_code.strip().replace(' ', '')
                item['register_code'] = register_code

            if isinstance(register_date, basestring):
                register_date = register_date.strip().replace(' ', '')
                item['register_date'] = register_date

            if isinstance(history_name, basestring):
                history_name = history_name.strip().replace(' ', '')
                item['history_name'] = history_name

            result_list.append(item)
            if len(result_list) >= 500:
                source_db.insert_batch_data(table_name, result_list)
                del result_list[:]

            if current_count % 1000 == 0:
                log.info("当前进度: province = {} current = {} total = {}".format(province, current_count, total_count))

        if len(result_list) > 0:
            source_db.insert_batch_data(table_name, result_list)
            del result_list[:]

        log.info("当前完成省份: province = {} current = {} total = {}".format(province, current_count, total_count))

    log.info("全部省份数据完成调整...")


if __name__ == '__main__':
    main()
